import requests
from bs4 import BeautifulSoup
import re
from w3lib import html

url="http://jwc.scu.edu.cn"
pageUrl=["http://jwc.scu.edu.cn/xwbd/xwbd.htm","http://jwc.scu.edu.cn/xwbd/xwbd/83.htm", "http://jwc.scu.edu.cn/xwbd/xwbd/82.htm"]
subUrl=[]
information=[]
passage=[]
imgUrl=[]
picNum=0
pagePicNum=[]
title=[]


for i in range(len(pageUrl)):
    response=requests.get(pageUrl[i])
    response.encoding = response.apparent_encoding
    soup=BeautifulSoup(response.text, "html.parser")
    get1 = soup.find_all('div', class_='list-e-main fl')
    for j in range(len(get1)):
        get2 = get1[j].find('h5', class_='text-over')
        title.append(get2.get_text())
        p=re.findall(r'/info.*htm', str(get2))
        subUrl.append(url+p[0])
for y in range(len(subUrl)):
    response=requests.get((subUrl[y]))
    response.encoding=response.apparent_encoding
    soup=BeautifulSoup(response.text, "html.parser")
    getinfo1=soup.find('div', class_='list-a-content')
    getinfo2=getinfo1.find('p', class_='page-date')
    information.append(getinfo2.get_text())
    getpassage=getinfo1.find('div', class_='v_news_content')
    passage.append(getpassage.get_text())
    imgArry=getpassage.find_all('img', class_="img_vsb_content")
    for m in range(len(imgArry)):
        imgUrl.append(url + ''.join(re.findall(r"orisrc=(.+?)jpg", str(imgArry[m])))[1:] + "jpg")
    pagePicNum.append(len(imgArry))
